//read in and clean big census/ACS files

***************Census***************
use "$data/ACS/usa_00008.dta", clear
merge 1:1 serial pernum using "$data/ACS/usa_00009", nogen //merge on fips codes
merge 1:1 serial pernum using "$data/ACS/usa_00010", nogen //merge on weeks worked
merge 1:1 serial pernum using "$data/ACS/usa_00014", nogen //merge on bpls
merge 1:1 serial pernum using "$data/ACS/usa_00015", nogen //merge on relate values
//merge 1:1 serial pernum using "$data/ACS/usa_00049", nogen //merge on relate values
merge 1:1 serial pernum using "$data/ACS/census_nchild", keep(match) nogen

//basic resrictions
drop if incwage == 999999 //standard
drop if educ<3 //drop if negligible or indeterminate education
keep if bpl<100 //keep us natives
//drop if citizen == 3
drop if statefip == 11 | bpl == 11 //kill DC
keep if age>=18 & age<=72
drop if gq>2

keep if race == 1 | race == 2 | hispan>0

gen racetype = 1
replace racetype = 2 if race == 2 & hispan == 0
replace racetype = 3 if hispan>0
tab racetype

//deflate
merge m:1 year using "$data/GDP/gdp_pce_deflator", nogen keep(match)
replace deflator = deflator/100
replace incwage = incwage/deflator //deflation
gen incwage_1968 = incwage * (100 / 19.152)

//hours/wages restrrictions
gen hours = wkswork1*uhrswork
gen flag_drop = 0
replace flag_drop = 1 if age>=36 & (hours<520 | incwage_1968<1500) //adequate work for older heads
replace flag_drop = 1 if age<36 & (hours<260 | incwage_1968<1000) //adequate work for older heads
replace flag_drop = 1 if hours>5280
replace flag_drop = 1 if incwage<=0

//windsorize income
su incwage, d
replace incwage = `r(p99)' if incwage>`r(p99)'
su incwage

//hourly wage variable
gen wage_hourly = incwage / hours
gen coll = (educ>=10)

//compress and save
compress
save "$temp/census_p25_sample", replace
drop if flag_drop == 1
save "$temp/census_master", replace


use "$temp/census_master", clear

replace incwage = incwage/47961
su incwage [fw=perwt] if age>=18 & age<=36 & coll
su incwage [fw=perwt] if age>=18 & age<=36 & !coll

***************ACS**************
use "$data/ACS/usa_00002", clear
keep serial pernum perwt year age bpl statefip incwage marst relate educ race sex hispan gq school //variable limitation
//drop if incwage <= 0 //standard
drop if educ<3 //drop if negligible or indeterminate education
drop if statefip == 11 | bpl == 11 //kill DC
keep if bpl<100 //keep us natives
keep if age>=18 & age<=72
drop if incwage==999999
drop if gq>2
merge 1:1 serial pernum year using "$data/ACS/usa_00028", keep(match) nogen //hours

//deflate
merge m:1 year using "$data/GDP/gdp_pce_deflator", nogen keep(match)
replace deflator = deflator/100
replace incwage = incwage/deflator //deflation
gen incwage_1968 = incwage * (100 / 19.152)

//hours/wages restrrictions
gen wkswork1 = 0
replace wkswork1 = 10 if wkswork2 == 1
replace wkswork1 = 20 if wkswork2 == 2
replace wkswork1 = 30 if wkswork2 == 3
replace wkswork1 = 40 if wkswork2 == 4
replace wkswork1 = 48 if wkswork2 == 5
replace wkswork1 = 52 if wkswork2 == 6
gen hours = wkswork1 * uhrswork


gen flag_drop = 0
replace flag_drop = 1 if age>=36 & (hours<520 | incwage_1968<1500) //adequate work for older heads
replace flag_drop = 1 if age<36 & (hours<260 | incwage_1968<1000) //adequate work for older heads
replace flag_drop = 1 if hours>5280
replace flag_drop = 1 if incwage<=0

//windsorize income
su incwage, d
replace incwage = `r(p99)' if incwage>`r(p99)'

//hourly wage variable
gen wage_hourly = incwage / hours
gen coll = (educ>=10)

compress
save "$temp/acs_all_people", replace


keep if race == 1 | race == 2 | hispan>0
gen racetype = 1
replace racetype = 2 if race == 2 & hispan == 0
replace racetype = 3 if hispan>0
tab racetype




//compress and save
compress
save "$temp/acs_marriage_sample", replace
drop if flag_drop
drop flag_drop
save "$temp/acs_master", replace

//end of dofile